package src;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;

import lucene.LuceneConstants;
public class TfIdf {

	/**
	 * @param args
	 */
	public void getidf(String unigram, String idfFile, String indexDir){
		
		String word = null;
		double idf;
		SearchFaq sf = new SearchFaq(indexDir);
		int totaldoc = sf.totalDoc();
		System.out.println("total doc is "+totaldoc);
		try {
			BufferedWriter writer = new BufferedWriter(
					new OutputStreamWriter(
							new FileOutputStream(new File(idfFile)),"UTF8"));
			BufferedReader reader = new BufferedReader(
					new InputStreamReader (
							new FileInputStream (new File(unigram)), "UTF8"));
			while((word = reader.readLine())!= null){
				int docFreq = sf.docFreq(word);
				if(docFreq!=0){
					idf = Math.log10(totaldoc/docFreq) ;
					writer.write(word+" :: "+idf+"\n");					
				}
				else{
					System.out.println(word);
				}
			}
			writer.close();
			reader.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		
		TfIdf ti = new TfIdf();
		ti.getidf(SMSConstant.hindiFAQdic,SMSConstant.hindiFAQtfidf,LuceneConstants.hindiFAQIndexdir);
		//ti.getidf(SMSConstant.yahoounigram,SMSConstant.yahootfidf,LuceneConstants.yahooindexdir);
		//ti.getidf(SMSConstant.mobileDic,SMSConstant.mobiletfidf,LuceneConstants.faqindexdir);
	}

}
